import json
import re
from abc import ABC
from lxml import etree

from Util.stringUtil import md5
from paser.Parser import Parser


class CgvParser(Parser, ABC):

    def getNextPage(self, html):
        """得到下一页"""
        # 解析下一页
        selector = etree.HTML(html)
        next_page_path = "//div[@class='center_page']/a[@class='paginationNowPage']/following-sibling::a[1]/@href"
        # 下一页的url
        next_page_url_arr = selector.xpath(next_page_path)
        if next_page_url_arr:
            return next_page_url_arr[0]
        return None

    def getDetailPageList(self, html):
        """在视觉中国爬虫中，详情页列表就是图片"""
        ex = 'window.__PRELOADED_STATE__ = (.*?)</script><script'
        data = re.findall(ex, html)[0]
        json_obj = json.loads(data)
        list_data = json_obj['searchImage']['data']['list']
        flash_list = []
        for i in list_data:
            flash_data = {
                'unique_key': md5(str(i['id'])),
                'src': 'http:' + i['src'],
                'createdTime': i['createdTime'],
            }
            flash_list.append(flash_data)
        return flash_list
